Python 3 (ipykernel)
x
!pip install Keras-Preprocessingimport numpy as npimport pandas as pdimport nltkimport tensorflow as tffrom nltk.corpus import stopwordsfrom nltk.corpus import reutersfrom nltk.corpus import brownfrom nltk.corpus import gutenbergfrom nltk.tokenize import RegexpTokenizerimport matplotlib.pyplot as pltimport seaborn as snsimport plotly.express as pximport pickleimport joblibfrom collections import Counterfrom textblob import Wordfrom wordcloud import WordCloudfrom sklearn.feature_extraction.text import CountVectorizer, TfidfTransformerfrom keras.preprocessing.text import Tokenizerfrom keras_preprocessing.sequence import pad_sequencesRequirement already satisfied: Keras-Preprocessing in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (1.1.2) Requirement already satisfied: numpy>=1.9.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from Keras-Preprocessing) (1.24.3) Requirement already satisfied: six>=1.9.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from Keras-Preprocessing) (1.16.0)
!pip install ktrainimport ktrainfrom ktrain import textsns.set()%matplotlib inlinenltk.download('stopwords')nltk.download('wordnet')nltk.download('omw-1.4')nltk.download('gutenberg')nltk.download('brown')nltk.download("reuters")nltk.download('words')Requirement already satisfied: ktrain in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (0.41.3) Requirement already satisfied: scikit-learn in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (1.3.0) Requirement already satisfied: matplotlib>=3.0.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (3.7.2) Requirement already satisfied: pandas>=1.0.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (2.0.3) Requirement already satisfied: fastprogress>=0.1.21 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (1.0.3) Requirement already satisfied: requests in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (2.31.0) Requirement already satisfied: joblib in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (1.2.0) Requirement already satisfied: packaging in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (23.1) Requirement already satisfied: langdetect in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (1.0.9) Requirement already satisfied: jieba in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (0.42.1) Requirement already satisfied: charset-normalizer in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (2.0.4) Requirement already satisfied: chardet in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (4.0.0) Requirement already satisfied: syntok>1.3.3 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (1.4.4) Requirement already satisfied: tika in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (2.6.0) Requirement already satisfied: transformers in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (4.32.1) Requirement already satisfied: sentencepiece in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (0.2.0) Requirement already satisfied: keras-bert>=0.86.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (0.89.0) Requirement already satisfied: whoosh in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from ktrain) (2.7.4) Requirement already satisfied: numpy in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-bert>=0.86.0->ktrain) (1.24.3) Requirement already satisfied: keras-transformer==0.40.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-bert>=0.86.0->ktrain) (0.40.0) Requirement already satisfied: keras-pos-embd==0.13.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.13.0) Requirement already satisfied: keras-multi-head==0.29.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.29.0) Requirement already satisfied: keras-layer-normalization==0.16.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.16.0) Requirement already satisfied: keras-position-wise-feed-forward==0.8.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.8.0) Requirement already satisfied: keras-embed-sim==0.10.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.10.0) Requirement already satisfied: keras-self-attention==0.51.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from keras-multi-head==0.29.0->keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.51.0) Requirement already satisfied: contourpy>=1.0.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (1.0.5) Requirement already satisfied: cycler>=0.10 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (4.25.0) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (1.4.4) Requirement already satisfied: pillow>=6.2.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (9.4.0) Requirement already satisfied: pyparsing<3.1,>=2.3.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from matplotlib>=3.0.0->ktrain) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from pandas>=1.0.1->ktrain) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from pandas>=1.0.1->ktrain) (2023.3) Requirement already satisfied: regex>2016 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from syntok>1.3.3->ktrain) (2022.7.9) Requirement already satisfied: six in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from langdetect->ktrain) (1.16.0) Requirement already satisfied: idna<4,>=2.5 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from requests->ktrain) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from requests->ktrain) (1.26.16) Requirement already satisfied: certifi>=2017.4.17 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from requests->ktrain) (2023.7.22) Requirement already satisfied: scipy>=1.5.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from scikit-learn->ktrain) (1.11.1) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from scikit-learn->ktrain) (2.2.0) Requirement already satisfied: setuptools in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from tika->ktrain) (68.0.0) Requirement already satisfied: filelock in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (3.9.0) Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (0.15.1) Requirement already satisfied: pyyaml>=5.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (6.0) Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (0.13.2) Requirement already satisfied: safetensors>=0.3.1 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (0.3.2) Requirement already satisfied: tqdm>=4.27 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from transformers->ktrain) (4.65.0) Requirement already satisfied: fsspec in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from huggingface-hub<1.0,>=0.15.1->transformers->ktrain) (2023.4.0) Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from huggingface-hub<1.0,>=0.15.1->transformers->ktrain) (4.7.1) Requirement already satisfied: colorama in c:\users\guptasau\appdata\local\anaconda3\lib\site-packages (from tqdm>=4.27->transformers->ktrain) (0.4.6)
[nltk_data] Downloading package stopwords to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package wordnet to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package wordnet is already up-to-date! [nltk_data] Downloading package omw-1.4 to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package omw-1.4 is already up-to-date! [nltk_data] Downloading package gutenberg to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package gutenberg is already up-to-date! [nltk_data] Downloading package brown to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package brown is already up-to-date! [nltk_data] Downloading package reuters to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package reuters is already up-to-date! [nltk_data] Downloading package words to [nltk_data] C:\Users\GuptaSau\AppData\Roaming\nltk_data... [nltk_data] Package words is already up-to-date!
True
from tensorflow.keras import utilsutils.to_categoricalfrom wordcloud import WordCloud,ImageColorGeneratorfrom PIL import Imageimport urllibimport requestsfrom keras.regularizers import l2x
# Dataset loading and replacing space() issue from the column "Potential Suicide post"df=pd.read_csv('C:/Users/GuptaSau/Downloads/Suicide_Ideation_Dataset(Twitter-based).csv', engine='python', encoding='UTF-8')df=df.replace('Potential Suicide post ','Potential Suicide post')df['Suicide'].value_counts()df| Tweet | Suicide | |
|---|---|---|
| 0 | making some lunch | Not Suicide post |
| 1 | @Alexia You want his money. | Not Suicide post |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post |
| ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post |
1787 rows × 2 columns
xxxxxxxxxxdf['Tweet']=df['Tweet'].fillna("")xxxxxxxxxx# Lower case conversiondf['lower_case']= df['Tweet'].apply(lambda x: x.lower())x
# Tokenization processtokenizer = RegexpTokenizer(r'\w+')df['special_word'] = df.apply(lambda row: tokenizer.tokenize(row['lower_case']), axis=1)df| Tweet | Suicide | lower_case | special_word | |
|---|---|---|---|---|
| 0 | making some lunch | Not Suicide post | making some lunch | [making, some, lunch] |
| 1 | @Alexia You want his money. | Not Suicide post | @alexia you want his money. | [alexia, you, want, his, money] |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post | @dizzyhrvy that crap took me forever to put to... | [dizzyhrvy, that, crap, took, me, forever, to,... |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post | @jnaylor #kiwitweets hey jer! since when did y... | [jnaylor, kiwitweets, hey, jer, since, when, d... |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post | trying out "delicious library 2" wit... | [trying, out, quot, delicious, library, 2, quo... |
| ... | ... | ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post | i have forgotten how much i love my nokia n95-1 | [i, have, forgotten, how, much, i, love, my, n... |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post | starting my day out with a positive attitude! ... | [starting, my, day, out, with, a, positive, at... |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post | @belledame222 hey, it's 5 am...give a girl som... | [belledame222, hey, it, s, 5, am, give, a, gir... |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post | 2 drunken besties stumble into my room and we ... | [2, drunken, besties, stumble, into, my, room,... |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post | @dancingbonita "i friggin love you!!!&quo... | [dancingbonita, quot, i, friggin, love, you, q... |
1787 rows × 4 columns
x
# Stopwords Removalstop = [word for word in stopwords.words('english') if word not in ["my","haven't","aren't","can","no", "why", "through", "herself", "she", "he", "himself", "you", "you're", "myself", "not", "here", "some", "do", "does", "did", "will", "don't", "doesn't", "didn't", "won't", "should", "should've", "couldn't", "mightn't", "mustn't", "shouldn't", "hadn't", "wasn't", "wouldn't"]]df['stop_words'] = df['special_word'].apply(lambda x: [item for item in x if item not in stop])df['stop_words'] = df['stop_words'].astype('str')df| Tweet | Suicide | lower_case | special_word | stop_words | |
|---|---|---|---|---|---|
| 0 | making some lunch | Not Suicide post | making some lunch | [making, some, lunch] | ['making', 'some', 'lunch'] |
| 1 | @Alexia You want his money. | Not Suicide post | @alexia you want his money. | [alexia, you, want, his, money] | ['alexia', 'you', 'want', 'money'] |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post | @dizzyhrvy that crap took me forever to put to... | [dizzyhrvy, that, crap, took, me, forever, to,... | ['dizzyhrvy', 'crap', 'took', 'forever', 'put'... |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post | @jnaylor #kiwitweets hey jer! since when did y... | [jnaylor, kiwitweets, hey, jer, since, when, d... | ['jnaylor', 'kiwitweets', 'hey', 'jer', 'since... |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post | trying out "delicious library 2" wit... | [trying, out, quot, delicious, library, 2, quo... | ['trying', 'quot', 'delicious', 'library', '2'... |
| ... | ... | ... | ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post | i have forgotten how much i love my nokia n95-1 | [i, have, forgotten, how, much, i, love, my, n... | ['forgotten', 'much', 'love', 'my', 'nokia', '... |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post | starting my day out with a positive attitude! ... | [starting, my, day, out, with, a, positive, at... | ['starting', 'my', 'day', 'positive', 'attitud... |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post | @belledame222 hey, it's 5 am...give a girl som... | [belledame222, hey, it, s, 5, am, give, a, gir... | ['belledame222', 'hey', '5', 'give', 'girl', '... |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post | 2 drunken besties stumble into my room and we ... | [2, drunken, besties, stumble, into, my, room,... | ['2', 'drunken', 'besties', 'stumble', 'my', '... |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post | @dancingbonita "i friggin love you!!!&quo... | [dancingbonita, quot, i, friggin, love, you, q... | ['dancingbonita', 'quot', 'friggin', 'love', '... |
1787 rows × 5 columns
x
# Word filtering with word length of less or equal to 2df['short_word'] = df['stop_words'].str.findall('\w{2,}')df['string']=df['short_word'].str.join(' ')df| Tweet | Suicide | lower_case | special_word | stop_words | short_word | string | |
|---|---|---|---|---|---|---|---|
| 0 | making some lunch | Not Suicide post | making some lunch | [making, some, lunch] | ['making', 'some', 'lunch'] | [making, some, lunch] | making some lunch |
| 1 | @Alexia You want his money. | Not Suicide post | @alexia you want his money. | [alexia, you, want, his, money] | ['alexia', 'you', 'want', 'money'] | [alexia, you, want, money] | alexia you want money |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post | @dizzyhrvy that crap took me forever to put to... | [dizzyhrvy, that, crap, took, me, forever, to,... | ['dizzyhrvy', 'crap', 'took', 'forever', 'put'... | [dizzyhrvy, crap, took, forever, put, together... | dizzyhrvy crap took forever put together iâ go... |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post | @jnaylor #kiwitweets hey jer! since when did y... | [jnaylor, kiwitweets, hey, jer, since, when, d... | ['jnaylor', 'kiwitweets', 'hey', 'jer', 'since... | [jnaylor, kiwitweets, hey, jer, since, did, yo... | jnaylor kiwitweets hey jer since did you start... |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post | trying out "delicious library 2" wit... | [trying, out, quot, delicious, library, 2, quo... | ['trying', 'quot', 'delicious', 'library', '2'... | [trying, quot, delicious, library, quot, mixed... | trying quot delicious library quot mixed resul... |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post | i have forgotten how much i love my nokia n95-1 | [i, have, forgotten, how, much, i, love, my, n... | ['forgotten', 'much', 'love', 'my', 'nokia', '... | [forgotten, much, love, my, nokia, n95] | forgotten much love my nokia n95 |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post | starting my day out with a positive attitude! ... | [starting, my, day, out, with, a, positive, at... | ['starting', 'my', 'day', 'positive', 'attitud... | [starting, my, day, positive, attitude, great,... | starting my day positive attitude great watch ... |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post | @belledame222 hey, it's 5 am...give a girl som... | [belledame222, hey, it, s, 5, am, give, a, gir... | ['belledame222', 'hey', '5', 'give', 'girl', '... | [belledame222, hey, give, girl, some, credit, ... | belledame222 hey give girl some credit trying |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post | 2 drunken besties stumble into my room and we ... | [2, drunken, besties, stumble, into, my, room,... | ['2', 'drunken', 'besties', 'stumble', 'my', '... | [drunken, besties, stumble, my, room, run, aro... | drunken besties stumble my room run around sob... |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post | @dancingbonita "i friggin love you!!!&quo... | [dancingbonita, quot, i, friggin, love, you, q... | ['dancingbonita', 'quot', 'friggin', 'love', '... | [dancingbonita, quot, friggin, love, you, quot... | dancingbonita quot friggin love you quot ron b... |
1787 rows × 7 columns
x
# Non-english words (mention,emoji,link,special characters etc..) removalwords = set(nltk.corpus.words.words())for w in reuters.words(): words.add(w)for w in brown.words(): words.add(w)for w in gutenberg.words(): words.add(w)df['nonEnglish'] = df['string'].apply(lambda x: " ".join(x for x in x.split() if x in words))df| Tweet | Suicide | lower_case | special_word | stop_words | short_word | string | nonEnglish | |
|---|---|---|---|---|---|---|---|---|
| 0 | making some lunch | Not Suicide post | making some lunch | [making, some, lunch] | ['making', 'some', 'lunch'] | [making, some, lunch] | making some lunch | making some lunch |
| 1 | @Alexia You want his money. | Not Suicide post | @alexia you want his money. | [alexia, you, want, his, money] | ['alexia', 'you', 'want', 'money'] | [alexia, you, want, money] | alexia you want money | alexia you want money |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post | @dizzyhrvy that crap took me forever to put to... | [dizzyhrvy, that, crap, took, me, forever, to,... | ['dizzyhrvy', 'crap', 'took', 'forever', 'put'... | [dizzyhrvy, crap, took, forever, put, together... | dizzyhrvy crap took forever put together iâ go... | crap took forever put together going go sleep ... |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post | @jnaylor #kiwitweets hey jer! since when did y... | [jnaylor, kiwitweets, hey, jer, since, when, d... | ['jnaylor', 'kiwitweets', 'hey', 'jer', 'since... | [jnaylor, kiwitweets, hey, jer, since, did, yo... | jnaylor kiwitweets hey jer since did you start... | hey since did you start twittering |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post | trying out "delicious library 2" wit... | [trying, out, quot, delicious, library, 2, quo... | ['trying', 'quot', 'delicious', 'library', '2'... | [trying, quot, delicious, library, quot, mixed... | trying quot delicious library quot mixed resul... | trying quot delicious library quot mixed resul... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post | i have forgotten how much i love my nokia n95-1 | [i, have, forgotten, how, much, i, love, my, n... | ['forgotten', 'much', 'love', 'my', 'nokia', '... | [forgotten, much, love, my, nokia, n95] | forgotten much love my nokia n95 | forgotten much love my |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post | starting my day out with a positive attitude! ... | [starting, my, day, out, with, a, positive, at... | ['starting', 'my', 'day', 'positive', 'attitud... | [starting, my, day, positive, attitude, great,... | starting my day positive attitude great watch ... | starting my day positive attitude great watch ... |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post | @belledame222 hey, it's 5 am...give a girl som... | [belledame222, hey, it, s, 5, am, give, a, gir... | ['belledame222', 'hey', '5', 'give', 'girl', '... | [belledame222, hey, give, girl, some, credit, ... | belledame222 hey give girl some credit trying | hey give girl some credit trying |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post | 2 drunken besties stumble into my room and we ... | [2, drunken, besties, stumble, into, my, room,... | ['2', 'drunken', 'besties', 'stumble', 'my', '... | [drunken, besties, stumble, my, room, run, aro... | drunken besties stumble my room run around sob... | drunken stumble my room run around sober drunk... |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post | @dancingbonita "i friggin love you!!!&quo... | [dancingbonita, quot, i, friggin, love, you, q... | ['dancingbonita', 'quot', 'friggin', 'love', '... | [dancingbonita, quot, friggin, love, you, quot... | dancingbonita quot friggin love you quot ron b... | quot love you quot riding unicorn |
1787 rows × 8 columns
#Lemmatizationdf['new_tweets'] = df['nonEnglish'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))df| Tweet | Suicide | lower_case | special_word | stop_words | short_word | string | nonEnglish | new_tweets | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | making some lunch | Not Suicide post | making some lunch | [making, some, lunch] | ['making', 'some', 'lunch'] | [making, some, lunch] | making some lunch | making some lunch | making some lunch |
| 1 | @Alexia You want his money. | Not Suicide post | @alexia you want his money. | [alexia, you, want, his, money] | ['alexia', 'you', 'want', 'money'] | [alexia, you, want, money] | alexia you want money | alexia you want money | alexia you want money |
| 2 | @dizzyhrvy that crap took me forever to put to... | Potential Suicide post | @dizzyhrvy that crap took me forever to put to... | [dizzyhrvy, that, crap, took, me, forever, to,... | ['dizzyhrvy', 'crap', 'took', 'forever', 'put'... | [dizzyhrvy, crap, took, forever, put, together... | dizzyhrvy crap took forever put together iâ go... | crap took forever put together going go sleep ... | crap took forever put together going go sleep day |
| 3 | @jnaylor #kiwitweets Hey Jer! Since when did y... | Not Suicide post | @jnaylor #kiwitweets hey jer! since when did y... | [jnaylor, kiwitweets, hey, jer, since, when, d... | ['jnaylor', 'kiwitweets', 'hey', 'jer', 'since... | [jnaylor, kiwitweets, hey, jer, since, did, yo... | jnaylor kiwitweets hey jer since did you start... | hey since did you start twittering | hey since did you start twittering |
| 4 | Trying out "Delicious Library 2" wit... | Not Suicide post | trying out "delicious library 2" wit... | [trying, out, quot, delicious, library, 2, quo... | ['trying', 'quot', 'delicious', 'library', '2'... | [trying, quot, delicious, library, quot, mixed... | trying quot delicious library quot mixed resul... | trying quot delicious library quot mixed resul... | trying quot delicious library quot mixed resul... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1782 | i have forgotten how much i love my Nokia N95-1 | Not Suicide post | i have forgotten how much i love my nokia n95-1 | [i, have, forgotten, how, much, i, love, my, n... | ['forgotten', 'much', 'love', 'my', 'nokia', '... | [forgotten, much, love, my, nokia, n95] | forgotten much love my nokia n95 | forgotten much love my | forgotten much love my |
| 1783 | Starting my day out with a positive attitude! ... | Not Suicide post | starting my day out with a positive attitude! ... | [starting, my, day, out, with, a, positive, at... | ['starting', 'my', 'day', 'positive', 'attitud... | [starting, my, day, positive, attitude, great,... | starting my day positive attitude great watch ... | starting my day positive attitude great watch ... | starting my day positive attitude great watch ... |
| 1784 | @belledame222 Hey, it's 5 am...give a girl som... | Not Suicide post | @belledame222 hey, it's 5 am...give a girl som... | [belledame222, hey, it, s, 5, am, give, a, gir... | ['belledame222', 'hey', '5', 'give', 'girl', '... | [belledame222, hey, give, girl, some, credit, ... | belledame222 hey give girl some credit trying | hey give girl some credit trying | hey give girl some credit trying |
| 1785 | 2 drunken besties stumble into my room and we ... | Not Suicide post | 2 drunken besties stumble into my room and we ... | [2, drunken, besties, stumble, into, my, room,... | ['2', 'drunken', 'besties', 'stumble', 'my', '... | [drunken, besties, stumble, my, room, run, aro... | drunken besties stumble my room run around sob... | drunken stumble my room run around sober drunk... | drunken stumble my room run around sober drunk... |
| 1786 | @dancingbonita "I friggin love you!!!&quo... | Not Suicide post | @dancingbonita "i friggin love you!!!&quo... | [dancingbonita, quot, i, friggin, love, you, q... | ['dancingbonita', 'quot', 'friggin', 'love', '... | [dancingbonita, quot, friggin, love, you, quot... | dancingbonita quot friggin love you quot ron b... | quot love you quot riding unicorn | quot love you quot riding unicorn |
1787 rows × 9 columns
xxxxxxxxxxfig = plt.figure(figsize=(14,7))df['length'] = df.new_tweets.str.split().apply(len)ax1 = fig.add_subplot(122)sns.histplot(df['length'], ax=ax1,color='green')describe = df.length.describe().to_frame().round(2)ax2 = fig.add_subplot(121)ax2.axis('off')font_size = 12bbox = [0, 0, 1, 1]table = ax2.table(cellText = describe.values, rowLabels = describe.index, bbox=bbox, colLabels=describe.columns)table.set_fontsize(font_size)fig.suptitle('Distribution of text length for tweets', fontsize=16)plt.show()xxxxxxxxxxsns.set_theme(style="whitegrid")sns.countplot(x=df["Suicide"])<Axes: xlabel='Suicide', ylabel='count'>
top = Counter([item for sublist in df['short_word'] for item in sublist])temp = pd.DataFrame(top.most_common(20))temp.columns = ['Common_words','count']fig = px.bar(temp, x="count", y="Common_words", title='Commmon words in selected text', orientation='h', width=700, height=700,color='Common_words')fig.show()Positive = df[df['Suicide']=='Not Suicide post']Suicidal = df[df['Suicide']=='Potential Suicide post']top = Counter([item for sublist in Positive['short_word'] for item in sublist])temp_positive = pd.DataFrame(top.most_common(20))temp_positive.columns = ['Common_words','count']fig = px.bar(temp_positive, x="count", y="Common_words", title='Most commmon words in positive posts', orientation='h', width=700, height=700,color='Common_words')fig.show()top = Counter([item for sublist in Suicidal['short_word'] for item in sublist])temp_negative = pd.DataFrame(top.most_common(20))temp_negative.columns = ['Common_words','count']fig = px.bar(temp_negative, x="count", y="Common_words", title='Most commmon words in suicidal posts', orientation='h', width=700, height=700,color='Common_words')fig.show()normal_words =' '.join([text for text in df['new_tweets']])wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(normal_words)plt.figure(figsize=(10, 7))plt.imshow(wordcloud, interpolation="bilinear")plt.axis('off')plt.show()xxxxxxxxxxpositive_words =' '.join([text for text in df['new_tweets'][df['Suicide'] == 'Not Suicide post']])wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(positive_words)plt.figure(figsize=(10, 7))plt.imshow(wordcloud, interpolation="bilinear")plt.axis('off')plt.show()negative_words =' '.join([text for text in df['new_tweets'][df['Suicide'] == 'Potential Suicide post']])wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(negative_words)plt.figure(figsize=(10, 7))plt.imshow(wordcloud, interpolation="bilinear")plt.axis('off')plt.show()